*--------------------------------------------------
* Program Setup
*--------------------------------------------------
set more off            // Disable partitioned output
clear all               // Start with a clean slate
set linesize 80         // Line size limit to make output more readable
macro drop _all         // clear all macros
*--------------------------------------------------

*--------------------------------------------------
* Global macros
*--------------------------------------------------

* Working directory (change this to your working director)
global workpath "D:\Dropbox\Research\Projects\LocalPolitics\drafts\paper\rcfs\final\replication"

* Open log
cd "$workpath"
log using 1_prepdata, replace

**********************************************************************************************
/*************************************** Create synthetic data ************************************/
************************************************************************************************

/* This section has been commented out - it was used to create the synthetic CRSP and
Compustat data files
*/

// *--------------------------------------------------
// * Synthetic Compustat fundamentals data 
// *--------------------------------------------------

// * Load
// cd "$workpath"
// use raw\firmpanel\compustat_funda.dta, clear

// * Set randomization seed
// set seed 12345

// * Generate synthetic variables (independent draws) preserving mean and variance
// foreach var of varlist sale tq capx_lassets xsga_lassets xrd_lassets sale_gr ebitda_lassets profit levltb {
//     quietly summarize `var'
//     local mean_`var' = r(mean)
//     local sd_`var' = r(sd)    
//     gen synth_`var' = rnormal(`mean_`var'', `sd_`var'')
// }

// * Drop originals and rename synthetic variables
// drop sale tq capx_lassets xsga_lassets xrd_lassets sale_gr ebitda_lassets profit levltb 
// rename synth_* *

// * Save
// compress
// save raw\firmpanel\compustat_funda_syn.dta, replace

// *--------------------------------------------------
// * Synthetic Compustat tax data 
// *--------------------------------------------------

// cd "$workpath"
// use raw\firmpanel\compustat_taxvars.dta, clear

// * Set randomization seed
// set seed 12345

// * Generate synthetic variables (independent draws) preserving mean and variance
// foreach var of varlist txfed_etr txs_etr txfo_etr {
//     quietly summarize `var'
//     local mean_`var' = r(mean)
//     local sd_`var' = r(sd)    
//     gen synth_`var' = rnormal(`mean_`var'', `sd_`var'')
// }


// * Drop originals and rename synthetic variables
// drop txfed_etr txs_etr txfo_etr 
// rename synth_* *

// * Save
// compress
// save raw\firmpanel\compustat_taxvars_syn.dta, replace

// *--------------------------------------------------
// * Synthetic CRSP returns data 
// *--------------------------------------------------

// cd "$workpath"
// use raw\returns\crsp_locationlinked_syn.dta, clear

// * Set randomization seed
// set seed 12345

// * Generate synthetic variables (independent draws) preserving mean and variance
// foreach var of varlist retadj {
//     quietly summarize `var'
//     local mean_`var' = r(mean)
//     local sd_`var' = r(sd)    
//     gen synth_`var' = rnormal(`mean_`var'', `sd_`var'')
// }

// * Drop originals and rename synthetic variables
// drop retadj
// rename synth_* *

// * Save
// compress
// save raw\returns\crsp_locationlinked_syn.dta, replace

**********************************************************************************************
/*************************************** Build firm panel ************************************/
************************************************************************************************

*--------------------------------------------------
* Merge Compustat with other datasets
*--------------------------------------------------

* Load
cd "$workpath"
use raw\firmpanel\compustat_funda_syn.dta, clear

* Get HQ state information
merge 1:1 gvkey fyear using raw\firmpanel\compustat_states.dta, update assert(1 2 3) keep(3)
tab fyear _merge
drop _merge

* Get partisan affiliation
gen year = fyear
rename st_abbr stabb
merge m:1 stabb year using raw\statepartisan.dta, update assert(1 2 3) keep(3)
drop _merge

* Get Senate tightness
merge m:1 year using raw\senatetightness.dta, update assert(2 3) keep(3) keepusing(*tight*)
drop _merge

* Last presidential election closeness
gen preselectyr = floor(year/4)*4
merge m:1 st_fips preselectyr using raw\preselectcloseness.dta, update assert(1 2 3) keep(1 3)
drop _merge

* Get committee chairmanship
merge m:1 stab year using raw/committeechairs.dta, update assert(1 2 3) keep(1 3) keepusing(*chair*)
drop _merge
rename chairpersonships chaircount 
rename chairpersonships_top*committees chaircount_top*
foreach type in "" "_top1" "_top3" "_top5" "_top10" {
	gen haschair`type' = chaircount`type'>=1 if !missing(chaircount`type')
}

* Get regulation
gen naics = naicsh4dN
merge m:1 naics year using raw\regdata_naics.dta, update assert(1 2 3) keep(1 3)
tab fyear _merge
drop _merge 

* Get trade
rename naics naics4
merge m:1 naics4 year using raw\usimports_indyr.dta, update assert(1 2 3) keep(1 3)
drop _merge naics4

* Get tax data
merge 1:1 gvkey fyear using raw\firmpanel\compustat_taxvars_syn.dta, update assert(1 2 3) keep(1 3)
drop _merge

* Get firm-level subsidies data
merge 1:1 gvkey fyear using raw\firmpanel\firm_subsidydata.dta, update assert(1 2 3) keep(1 3) keepusing(loan_amt* subsidy_amt* matched2subsidies)
tab fyear _merge
drop _merge

* Only keep if between 1994 and 2021
keep if inrange(fyear,1994,2021)

*--------------------------------------------------
* Define new variables
*--------------------------------------------------

* Aggregate by subsidy type
egen subsidyamt_tot = rowtotal(subsidy_amt*)
egen loanamt_tot = rowtotal(loan_amt*)
egen subsidyamt_allgrants = rowtotal(subsidy_amt4 subsidy_amt8)
egen subsidyamt_tax = rowtotal(subsidy_amt15 subsidy_amt18)
rename subsidy_amt4 subsidyamt_fedgrant
rename subsidy_amt6 loanamt_fed
rename subsidy_amt8 subsidyamt_grants
rename subsidy_amt15 subsidyamt_proptax
rename subsidy_amt18 subsidyamt_taxcredit
drop subsidy_amt* loan_amt*

* Subsidy amount variables (scale by sale)
foreach var of varlist subsidyamt* loanamt* {
	gen `var'_sale = (`var'/sale)/1000000
	replace `var'_sale=0 if missing(`var') & !missing(sale)
}

* Subsidy received for given type (extensive margin)
foreach type in "grants" "fedgrant" "allgrants" "proptax" "taxcredit" "tax" {
	gen subsidyrec_`type' = subsidyamt_`type'>0 & !missing(subsidyamt_`type')
}
rename matched2subsidies subsidyrec_all
replace subsidyrec_all=0 if missing(subsidyrec_all)

* Firms that received subsidy during sample period
foreach type in taxcredit proptax grants fedgrant {
	egen subsidizedfirm_`type' = max(subsidyrec_`type'), by(gvkey)
}
egen subsidizedfirm = max(subsidyrec_all), by(gvkey)

* Filters
gen indfilter = ((sich4dN>=6000 & sich4dN<=6999) | (sich4dN>=4900 & sich4dN<=4999) | (sich4dN>=9000 & sich4dN<.)) 

* Dummies
egen indyr = group(naicsh4dN fyear)

* Winsorize
winsor2 *_lassets lev* profit* *_gr tx*, cuts(1 99) replace
winsor2 tq, cuts(1 98) replace

* Natural logs
foreach var of varlist sentenure sale_gr {
	gen ln_`var' = ln(`var')
}

* Get tradable indicator from Mian and Sufi
merge m:1 naicsh4dN using raw\tradable_miansufi.dta, keep(1 3) keepusing(tradeable)
drop _merge
rename tradeable tradable

* Swing state dummy
local swingvar "absvotediff"
egen temp = pctile(`swingvar'), p(50) by(fyear)
gen swingstate = `swingvar' < temp if !missing(`swingvar')
drop temp

* Default benchmark RHS variables
gen tight = tightsenate6
gen pivotal = swingstate
gen pivotal_tight = pivotal * tight

* Define majority member
gen senateD = inrange(year,1993,1994) | inrange(year,2007,2014)
gen senateR = inrange(year,1995,2000) | inrange(year,2003,2006) | inrange(year,2015,2020)
gen majpty = (senatorsD == 1 & senateD == 1) | (senatorsR == 1 & senateR == 1)
gen minpty = (senatorsD == 1 & senateR == 1) | (senatorsR == 1 & senateD == 1)

* Set panel
sort gvkey fyear
xtset gvkey fyear

*--------------------------------------------------
* Interaction variables 
*--------------------------------------------------

* Interaction terms
foreach cutvar of varlist tradable {
	gen pivotal_`cutvar'_tight = pivotal * `cutvar' * tight
	gen pivotal_`cutvar' = pivotal * `cutvar'
	gen `cutvar'_tight = `cutvar' * tight
}

* Regulation dummy
local regvar "industry_restrictions_2_0"
egen temp = pctile(`regvar'), p(50) by(fyear)
gen reg = `regvar' >= temp if !missing(`regvar')
drop temp

* Interaction terms
local cutvar "reg"
gen pivotal_`cutvar'_tight = pivotal * `cutvar' * tight
gen pivotal_`cutvar' = pivotal * `cutvar'
gen `cutvar'_tight = `cutvar' * tight

* Trade dummies
local tradevar "ipr"
egen temp = pctile(`tradevar'), p(50) by(fyear)
gen hightrade = `tradevar' >= temp if !missing(`tradevar')
drop temp

* Interaction terms
local cutvar "hightrade"
gen pivotal_`cutvar'_tight = pivotal * `cutvar' * tight
gen pivotal_`cutvar' = pivotal * `cutvar'
gen `cutvar'_tight = `cutvar' * tight

*--------------------------------------------------
* Save 
*--------------------------------------------------

* Save
compress
isid gvkey fyear
save intermediate\localpolitics_firmpanel.dta, replace

**********************************************************************************************
/*************************************** Build state panel ************************************/
************************************************************************************************

*--------------------------------------------------
* Merge state level data
*--------------------------------------------------

* Get QCEW data
cd "$workpath"
use raw\statepanel\qcew_statepanel.dta, clear
keep if year >= 1992

* Get state abbreviation
merge m:1 st_fips using raw\state_codes.dta, update assert(2 3) keep(3) keepusing(st_abbr)
drop _m

* Get state partisan info
rename st_abbr stabb
merge m:1 stabb year using raw\statepartisan.dta, update assert(1 2 3) keep(3)
tab year _merge
drop _merge

* Get Senate tightness
merge m:1 year using raw\senatetightness.dta, update assert(2 3) keep(3)
tab year _merge
drop _merge

* Last presidential election closeness
gen preselectyr = floor(year/4)*4
merge m:1 st_fips preselectyr using raw\preselectcloseness.dta, update assert(1 2 3) keep(1 3)
tab year _merge
drop _merge

* Get lagged population
replace year = year - 1
merge m:1 st_fips year using raw\statepanel\statepop.dta, update assert(1 2 3) keep(1 3) keepusing(pop)
assert _m == 3 | year < 1990
drop _m
replace year = year + 1

* QWI variables
merge 1:1 st_fips year using raw\statepanel\qwi_state_firmsize.dta, update assert(1 2 3) keep(1 3)
tab year _merge
drop _merge

* Get BEA GDP 
merge 1:1 st_fips year using raw\statepanel\gdp_state.dta, update assert(1 2 3) keep(1 3) keepusing(gdp_real)
tab year _merge
drop _merge 

* Get government spending
merge 1:1 st_fips year using raw\statepanel\govspend_statepanel.dta, update keepusing(amt*) assert(2 3) keep(3)
drop _merge
rename amtrl* amtspend*
egen amtspend_tot = rowtotal(amtspend_*)
drop *coopagmt *specpayment *unrespayment *insurance

*--------------------------------------------------
* Winsorize and define new variables
*--------------------------------------------------

* Winsorize
winsor2 estabs* wages* emp* qemp* , cuts(1 99) replace
winsor2 amtspend*, cuts(1 99) replace

* Generate logs
foreach var of varlist estabs_* emp_* wages_* qemp*  gdp_real sentenure amtspend* pop {
	gen ln_`var' = ln(`var')
}

* Define majority member
gen senateD = inrange(year,1993,1994) | inrange(year,2007,2014)
gen senateR = inrange(year,1995,2000) | inrange(year,2003,2006) | inrange(year,2015,2020)
gen majpty = (senatorsD == 1 & senateD == 1) | (senatorsR == 1 & senateR == 1)
gen minpty = (senatorsD == 1 & senateR == 1) | (senatorsR == 1 & senateD == 1)

* Swing state dummy
local swingvar "absvotediff"
egen temp = pctile(`swingvar'), p(50) by(year)
gen swingstate = `swingvar' < temp if !missing(`swingvar')
drop temp

* Default benchmark RHS variables
gen tight = tightsenate6
gen pivotal = swingstate
gen pivotal_tight = pivotal * tight

*--------------------------------------------------
* Change units for summary statistics
*--------------------------------------------------

* Express in billions
foreach var of varlist amtspend_tot amtspend_contracts amtspend_grants amtspend_loan amtspend_payment wages_priv {
	replace `var'=`var'/1000000000
}

* Express in millions
foreach var of varlist emp_priv estabs_priv {
	replace `var'=`var'/1000000
}

* GDP is already in millions - scale by 1,000 to get in $billions
foreach var of varlist gdp_real {
	replace `var'=`var'/1000
}

*--------------------------------------------------
* Save data
*--------------------------------------------------

* Drop if before 1994
drop if year<1994

* Save
compress
save intermediate\localpolitics_statepanelfinal.dta, replace

**********************************************************************************************
/*************************************** Build legvoting panel ************************************/
************************************************************************************************

*--------------------------------------------------
* Merge bill-level data with election closeness
*--------------------------------------------------

* Load data
use raw\legvoting\legvoting_billlvl.dta, clear

* Get state abbreviation
merge m:1 st_abbr using raw\state_codes.dta, update assert(2 3) keep(3) keepusing(st_fips)
drop _m

* Last presidential election closeness
gen preselectyr = floor((1787 + 2 * congress)/4)*4
merge m:1 st_fips preselectyr using raw/preselectcloseness.dta, update assert(1 2 3) keep(1 3)
tab congress _merge
drop _merge

* Legislative vote deviation from median
egen votemed = median(cast_code), by(congress party_code rollnumber)
gen votedev = (cast_code == 1 & votemed == 6) | (cast_code == 6 & votemed == 1)

* Save
compress
save intermediate\localpolitics_legovtes_billfinal.dta, replace

**********************************************************************************************
/*************************************** Build returns data ************************************/
************************************************************************************************

*--------------------------------------------------
* Link CRSP returns with relevant variables
*--------------------------------------------------

* Load data
cd "$workpath"
use raw\returns\crsp_locationlinked_syn.dta, clear
// assert year == 2020

* Merge with FF3 returns
merge m:1 date using raw\returns\ff3_daily.dta, update assert(2 3) keep(3) keepusing(mrp smb hml rf)
drop _m

* Descale FF3 returns
foreach var in "mrp" "smb" "hml" "rf" {
	replace `var' = `var' / 100
}

* Get state-level politics
rename st_abbr stabb
merge m:1 stabb year using raw\statepartisan.dta, update assert(1 2 3) keep(3)
drop _merge

* Last presidential election closeness
gen preselectyr = 2020
merge m:1 st_fips preselectyr using raw\preselectcloseness.dta, update assert(1 2 3) keep(1 3)
drop _merge

* Drop GA
drop if stabb == "GA"
sort permno date

*--------------------------------------------------
* Define windows
*--------------------------------------------------

* Event date
gen elecdate = mdy(1,5,2021)
gen eventdate = date - elecdate
workdays elecdate date, gen(eventtdate)

* Define estimation and event windows
gen estWindow = eventtdate >= -220 & eventtdate <= -11
gen eventWindowSR = eventtdate >= -10 & eventtdate <= 10
gen eventWindowLR = eventtdate >= -10 & eventtdate <= 80

* Drop if don't have full set of returns in estimation and event window
egen eventday_max = max(eventtdate), by(permno)
egen eventday_min = min(eventtdate), by(permno)
drop if eventday_max < 80 | eventday_min > -220
drop eventday_m??

** Drop all observations outside window
drop if !estWindow & !eventWindowLR
sort permno date

* Define returns
gen exret = retadj - rf
gen mktret = mrp + rf

* Calculate expected and abnormal returns
sort permno
local lhsvar1 "exret"
local lhsvar2 "exret"
local lhsvar3 "retadj"
local factors1 "mrp"
local factors2 "mrp smb hml"
local factors3 "mktret"
local type1 "capm"
local type2 "ff3"
local type3 "mkt"
forvalues i=1/3 {
	quietly bysort permno: reg `lhsvar`i'' `factors`i'' if estWindow == 1
	quietly predict ar_`type`i'', residuals
}	

* Only keep event Window
keep if eventWindowLR

* Cumulative returns
sort permno date
foreach type in "capm" "ff3" "mkt" {
	bysort permno: gen car_`type' = sum(ar_`type')
}
bysort permno: gen cumret = sum(retadj)

* Organize
sort permno eventdate
compress
isid permno date

* Swing state dummy
local swingvar "absvotediff"
egen temp = pctile(`swingvar'), p(50)
gen swingstate = `swingvar' < temp if !missing(`swingvar')
drop temp

* Save
compress
save intermediate\eventstudy_abnormalrets_final.dta, replace

*--------------------------------------------------
* Collapse for plotting
*--------------------------------------------------

* Load data
cd "$workpath"
use intermediate\eventstudy_abnormalrets_final.dta, clear

* Get mean diff by date
foreach var of varlist retadj ar_capm ar_ff3 cumret car_capm car_ff3 car_mkt {
	gen `var'_diff = .
	gen `var'_diffse = .
	levelsof date, local(thedates)
	foreach day in `thedates' {
		quietly ttest `var' if date==`day', by(swingstate)
		replace `var'_diff = r(mu_2) - r(mu_1) if date==`day'
		replace `var'_diffse = r(se) if date==`day'
	} 
}

* Collapse (include mean standard errors)
collapse (first) event*date (mean) retadj* ar_capm* ar_ff3* cum*ret* car_* ///
(semean) sem_retadj=retadj sem_ar_capm=ar_capm sem_ar_ff3=ar_ff3 ///
sem_cumret=cumret sem_car_capm=car_capm sem_car_ff3=car_ff3 sem_car_mkt=car_mkt, by(date swingstate)

* Get upper and lower bounds for cumulative returns
foreach var in retadj ar_capm ar_ff3 cumret car_capm car_ff3 car_mkt {
	gen `var'_ub = `var' + 1.96 * sem_`var'
	gen `var'_lb = `var' - 1.96 * sem_`var'
}
drop sem*

* Get upper and lower bounds for difference in returns
foreach var of varlist retadj ar_capm ar_ff3 cumret car_capm car_ff3 car_mkt {
	gen `var'_diffub = `var'_diff + 1.96 * `var'_diffse
	gen `var'_difflb = `var'_diff - 1.96 * `var'_diffse
} 
drop *_diffse

* Reshape
reshape wide cum*ret* car* retadj* ar_capm* ar_ff3*, i(eventtdate) j(swingstate)

* Rename
drop *diff1 *diffub1 *difflb1
rename *diff0 *diff
rename *diffub0 *diffub
rename *difflb0 *difflb

* Save
compress
save intermediate\eventstudy_abnormalrets_plots.dta, replace

**********************************************************************************************
/******************************** Build files for political TS ******************************/
**********************************************************************************************

*--------------------------------------------------
* PRESELECTCLOSENESS_ANNUALAVG.DTA
*--------------------------------------------------

* Get data
cd "$workpath"
use raw/preselectcloseness.dta, clear
collapse (mean) absvotediff, by(preselectyr)

* Save
compress
isid preselectyr
save intermediate/preselectcloseness_annualavg.dta, replace

*--------------------------------------------------
* SENATETIGHTNESS_TSANALYSIS.DTA
*--------------------------------------------------

* Get data
cd "$workpath"
use raw/senatetightness.dta, clear

* Merge with GDP data
merge m:1 year using raw\realgdp.dta, update assert(2 3) keep(3) keepusing(realgdp*)
drop _m

* Merge with Unemployment data
merge m:1 year using raw\unrate.dta, update assert(2 3) keep(3) keepusing(unrate)
drop _m

* Last presidential election closeness
gen preselectyr = floor(year/4)*4
merge m:1 preselectyr using intermediate/preselectcloseness_annualavg.dta, update assert(1 2 3) keep(1 3)
drop _merge

* Transform variables
replace realgdp_gr = realgdp_gr - 1 // net rate
replace unrate = unrate/100 // pct term

* Correlations
reg realgdp_gr tightsenate6 if inrange(year,1994,2020)
reg unrate tightsenate6 if inrange(year,1994,2020)

* T-tests
ttest realgdp_gr if inrange(year,1994,2020), by(tightsenate6)
ttest unrate if inrange(year,1994,2020), by(tightsenate6)
ttest absvotediff if inrange(year,1994,2020), by(tightsenate6)

* Scale down tightsenate
gen tightsenateshade = tightsenate6/6

* Save
compress
isid year
save intermediate/senatetightness_tsanalysis.dta, replace

*------------------------------------
* SWINGSTATE_PANEL.CSV
*------------------------------------

* Get data
cd "$workpath"
use intermediate/localpolitics_statepanelfinal.dta, clear
keep st_fips stabb year congress tightsenate* absvotediff

* CALCULATE SWING STATE BY ABSVOTEDIFF

* swing state dummy
local swingvar "absvotediff"
egen temp = pctile(`swingvar'), p(50) by(year)
gen swingstate = `swingvar' < temp if !missing(`swingvar')
drop temp

* never a swing state
bys st_fips: egen everswingstate = max(swingstate)

* CREATE DATASET WITH SWING STATE SPELLS

sort stabb year

by  stabb: gen spell = 1 if _n == 1
replace spell = spell[_n-1] + (swingstate != swingstate[_n-1]) if stabb == stabb[_n-1]

rename stabb state

export delimited state year absvotediff swingstate everswingstate spell using intermediate/swingstate_panel.csv, replace

************************************************************************************************************

* Close log
log close
